In [45]:
import sys
sys.setrecursionlimit(10000)
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)
In [3]:
import cPickle
import gzip

from breze.learn.data import one_hot
from breze.learn.base import cast_array_to_local_type
from breze.learn.utils import tile_raster_images

import climin.stops
import climin.initialize

from breze.learn import hvi
from breze.learn.hvi import HmcViModel
from breze.learn import hvi
from breze.learn.hvi.energies import (NormalGaussKinEnergyMixin, DiagGaussKinEnergyMixin)
from breze.learn.hvi.inversemodels import MlpGaussInvModelMixin

from matplotlib import pyplot as plt
from matplotlib import cm

import numpy as np

#import fasttsne

from IPython.html import widgets
%matplotlib inline

import theano
theano.config.compute_test_value = 'ignore'#'raise'
from theano import tensor as T
C:\Anaconda\lib\site-packages\scipy\lib\_util.py:35: DeprecationWarning: Module scipy.linalg.blas.fblas is deprecated, use scipy.linalg.blas instead
  DeprecationWarning)
C:\Anaconda\lib\site-packages\scipy\lib\_util.py:35: DeprecationWarning: Module scipy.linalg.blas.fblas is deprecated, use scipy.linalg.blas instead
  DeprecationWarning)
C:\Anaconda\lib\site-packages\scipy\lib\_util.py:35: DeprecationWarning: Module scipy.linalg.blas.fblas is deprecated, use scipy.linalg.blas instead
  DeprecationWarning)
C:\Anaconda\lib\site-packages\scipy\lib\_util.py:35: DeprecationWarning: Module scipy.linalg.blas.fblas is deprecated, use scipy.linalg.blas instead
  DeprecationWarning)
C:\Anaconda\lib\site-packages\scipy\lib\_util.py:35: DeprecationWarning: Module scipy.linalg.blas.fblas is deprecated, use scipy.linalg.blas instead
  DeprecationWarning)
C:\Anaconda\lib\site-packages\scipy\lib\_util.py:35: DeprecationWarning: Module scipy.linalg.blas.fblas is deprecated, use scipy.linalg.blas instead
  DeprecationWarning)
C:\Anaconda\lib\site-packages\scipy\lib\_util.py:35: DeprecationWarning: Module scipy.linalg.blas.fblas is deprecated, use scipy.linalg.blas instead
  DeprecationWarning)
C:\Anaconda\lib\site-packages\scipy\lib\_util.py:35: DeprecationWarning: Module scipy.linalg.blas.fblas is deprecated, use scipy.linalg.blas instead
  DeprecationWarning)

In [4]:
datafile = '../mnist.pkl.gz'
# Load data.                                                                                                   

with gzip.open(datafile,'rb') as f:                                                                        
    train_set, val_set, test_set = cPickle.load(f)                                                       

X, Z = train_set                                                                                               
VX, VZ = val_set
TX, TZ = test_set

Z = one_hot(Z, 10)
VZ = one_hot(VZ, 10)
TZ = one_hot(TZ, 10)

X_no_bin = X
VX_no_bin = VX
TX_no_bin = TX

# binarize the MNIST data
np.random.seed(0)
X  = np.random.binomial(1, np.tile(X, (5, 1))) * 1.0
VX = np.random.binomial(1, VX) * 1.0
TX = np.random.binomial(1, TX) * 1.0

image_dims = 28, 28

X, Z, VX, VZ, TX, TZ = [cast_array_to_local_type(i) for i in (X, Z, VX,VZ, TX, TZ)]
print X.shape
(250000L, 784L)

In [5]:
fig, ax = plt.subplots(figsize=(9, 9))

img = tile_raster_images(X[:64], image_dims, (8, 8), (1, 1))
ax.imshow(img, cmap=cm.binary)
Out[5]:
<matplotlib.image.AxesImage at 0x17ee9358>
In [6]:
fast_dropout = False

if fast_dropout:
    class MyHmcViModel(HmcViModel, 
                   hvi.FastDropoutMlpBernoulliVisibleVAEMixin, 
                   hvi.FastDropoutMlpGaussLatentVAEMixin, 
                   DiagGaussKinEnergyMixin,
                   MlpGaussInvModelMixin):
        pass

    kwargs = {
        'p_dropout_inpt': .1,
        'p_dropout_hiddens': [.2, .2],
    }

    print 'yeah'

else:
    class MyHmcViModel(HmcViModel, 
                   hvi.MlpBernoulliVisibleVAEMixin, 
                   hvi.MlpGaussLatentVAEMixin, 
                   DiagGaussKinEnergyMixin,
                   MlpGaussInvModelMixin):
        pass
    kwargs = {}


batch_size = 200
#optimizer = 'rmsprop', {'step_rate': 1e-4, 'momentum': 0.95, 'decay': .95, 'offset': 1e-6}
#optimizer = 'adam', {'step_rate': .5, 'momentum': 0.9, 'decay': .95, 'offset': 1e-6}
optimizer = 'adam', {'step_rate': 0.001}

# This is the number of random variables NOT the size of 
# the sufficient statistics for the random variables.
n_latents = 2
n_hidden = 200

m = MyHmcViModel(X.shape[1], n_latents, 
                 [n_hidden, n_hidden], ['rectifier'] * 2, 
                 [n_hidden], ['rectifier'] * 1, 
                 [n_hidden], ['rectifier'] * 1,
                 n_hmc_steps=3, n_lf_steps=4,
                 n_z_samples=1,
          optimizer=optimizer, batch_size=batch_size, allow_partial_velocity_update=False, perform_acceptance_step=False,
          **kwargs)

climin.initialize.randomize_normal(m.parameters.data, 0.1, 1e-1)
m.parameters.__setitem__(m.hmc_sampler.step_size_param, 0.1)
#m.parameters.__setitem__(m.kin_energy.mlp.layers[-1].bias, 1)
\\srv-file.brml.tum.de\nthome\cwolf\code\ml_support\theano\theano\gof\cmodule.py:324: RuntimeWarning: numpy.ndarray size changed, may indicate binary incompatibility
  rval = __import__(module_name, {}, {}, [module_name])

In [7]:
old_best_params = None
#print m.score(TX)
print m.parameters.data.shape
(514595L,)

In [8]:
FILENAME = 'hvi_gen2_recog1_aux1_late2_hid200_newbin_3hmc_04lf_1z.pkl'

# In[5]:
#old_best_params = None
f = open(FILENAME, 'rb')
np_array = cPickle.load(f)
old_best_params = cast_array_to_local_type(np_array)
f.close()
print old_best_params.shape
(514595L,)

In [9]:
m.parameters.data = old_best_params.copy()
old_best_loss = m.score(VX)
print old_best_loss
print m.score(TX)
compiled score function
142.6594375
143.665275

In [26]:
print m.estimate_nll(TX, 500)
136.245914548

In [10]:
print m.parameters.view(m.hmc_sampler.step_size_param) ** 2 + 1e-5
[ 0.00735745]

In [None]:
max_passes = 250
max_iter = max_passes * X.shape[0] / batch_size
n_report = X.shape[0] / batch_size

stop = climin.stops.AfterNIterations(max_iter)
pause = climin.stops.ModuloNIterations(n_report)

# theano.config.optimizer = 'fast_compile'

for i, info in enumerate(m.powerfit((X,), (VX,), stop, pause, eval_train_loss=False)):
    print i, info['loss'], info['val_loss']
    if i == 0 and old_best_params is not None:
        if info['best_loss'] > old_best_loss:
            info['best_loss'] = old_best_loss
            info['best_pars'] = old_best_params
    
    if info['best_loss'] == info['val_loss']:
        np.savetxt('hvi_short_basic_test.csv', m.parameters.data, delimiter=',')
In [11]:
print m.score(VX_no_bin)
print m.score(TX_no_bin)
142.386825
143.8346125

In [46]:
print 1/min(TX[TX>0])
print (np.random.binomial(1, min(TX[TX>0]), size=(1000,))*1.0).sum()
print (np.random.binomial(1, np.tile(TX, (5, 1)))*1.0).shape
256.0
8.0
(50000L, 784L)

In [12]:
f_z_init_sample = m.function(['inpt'], m.init_recog.sample())
f_z_sample = m.function(['inpt'], m.hmc_sampler.output)
f_gen = m.function([m.gen.inpt], m.gen.sample())
f_gen_rate = m.function([m.gen.inpt], m.gen.rate)
In [16]:
from theano import tensor as T
curr_pos = T.matrix('current_position')
curr_vel = T.matrix('current_velocity')
norm_noise = T.matrix('normal_noise')
unif_noise = T.vector('uniform_noise')

new_sampled_vel = m.hmc_sampler.kin_energy.sample(norm_noise)
updated_vel = m.hmc_sampler.partial_vel_constant * curr_vel + m.hmc_sampler.partial_vel_complement * new_sampled_vel
performed_hmc_steps = m.hmc_sampler.perform_hmc_steps(curr_pos, curr_vel)
hmc_step = m.hmc_sampler.hmc_step(curr_pos, curr_vel, np.float32(0), norm_noise, unif_noise)

f_perform_hmc_steps = m.function(['inpt', curr_pos, curr_vel], 
                                T.concatenate([performed_hmc_steps[0], performed_hmc_steps[1]], axis=1))
f_hmc_step = m.function(['inpt', curr_pos, curr_vel, norm_noise, unif_noise], 
                        T.concatenate([hmc_step[0], hmc_step[1]],axis=1), on_unused_input='warn')
f_kin_energy_sample_from_noise = m.function(['inpt', norm_noise], new_sampled_vel)
f_updated_vel_from_noise = m.function(['inpt', curr_vel, norm_noise], updated_vel)
\\srv-file.brml.tum.de\nthome\cwolf\code\2015-christopherwolf-msc\breze\arch\util.py:583: UserWarning: theano.function was asked to create a function computing outputs given certain inputs, but the provided input variable at index 5 is not part of the computational graph needed to compute the outputs: uniform_noise.
To make this warning into an error, you can pass the parameter on_unused_input='raise' to theano.function. To disable it completely, use on_unused_input='ignore'.
  on_unused_input=on_unused_input, updates=updates)

In [27]:
fig, axs = plt.subplots(3, 3, figsize=(27, 27))

### Original data

O = (X_no_bin[:64])[:, :784].astype('float32')
img = tile_raster_images(O, image_dims, (8, 8), (1, 1))
axs[0, 0].imshow(img, cmap=cm.binary)

O2 = (X[:64])[:, :784].astype('float32')
img = tile_raster_images(O2, image_dims, (8, 8), (1, 1))
axs[1, 0].imshow(img, cmap=cm.binary)

### Reconstruction

#z_sample = f_z_sample((X[:64]))
z_init_sample = f_z_init_sample((X[:64]))
z_sample = f_perform_hmc_steps((X[:64]), 
                               z_init_sample, 
                               f_kin_energy_sample_from_noise((X[:64]), 
                                                              np.random.normal(size=(64, m.n_latent)).astype('float32'))
                               )[-1, :64, :]

R = f_gen_rate(z_sample)[:, :784].astype('float32')
img = tile_raster_images(R, image_dims, (8, 8), (1, 1))
axs[0, 1].imshow(img, cmap=cm.binary)

Rinit = f_gen_rate(z_init_sample)[:, :784].astype('float32')
img = tile_raster_images(Rinit, image_dims, (8, 8), (1, 1))
axs[0, 2].imshow(img, cmap=cm.binary)

R2 = f_gen(z_sample)[:, :784].astype('float32')
img = tile_raster_images(R2, image_dims, (8, 8), (1, 1))
axs[1, 1].imshow(img, cmap=cm.binary)

Rinit2 = f_gen(z_init_sample)[:, :784].astype('float32')
img = tile_raster_images(Rinit2, image_dims, (8, 8), (1, 1))
axs[1, 2].imshow(img, cmap=cm.binary)


### Sampling

prior_sample = np.random.randn(64, m.n_latent).astype('float32')

S = f_gen_rate(prior_sample)[:, :784].astype('float32')
img = tile_raster_images(S, image_dims, (8, 8), (1, 1))
axs[2, 0].imshow(img, cmap=cm.binary)

S2 = f_gen(prior_sample)[:, :784].astype('float32')
img = tile_raster_images(S2, image_dims, (8, 8), (1, 1))
axs[2, 1].imshow(img, cmap=cm.binary)

#S3 = f_gen_rate(prior_sample)[:, :784].astype('float32')
img = tile_raster_images(S, image_dims, (8, 8), (1, 1))
axs[2, 2].imshow(img, cmap=cm.nipy_spectral)
Out[27]:
<matplotlib.image.AxesImage at 0x3e50e748>
In [23]:
from scipy.stats import norm as normal_distribution

unit_interval_positions = np.linspace(0.025, 0.975, 20)
positions = normal_distribution.ppf(unit_interval_positions)
print unit_interval_positions
print positions

latent_array = np.zeros((400, 2))

latent_array[:, 1] = -np.repeat(positions, 20)  # because images are filled top -> bottom, left -> right (row by row)
latent_array[:, 0] = np.tile(positions, 20)
        
fig, axs = plt.subplots(1, 1, figsize=(24, 24))

F = f_gen_rate(latent_array.astype('float32'))

img = tile_raster_images(F, image_dims, (20, 20), (1, 1))
#axs.imshow(img, cmap=cm.nipy_spectral)
axs.imshow(img, cmap=cm.binary)
[ 0.025  0.075  0.125  0.175  0.225  0.275  0.325  0.375  0.425  0.475
  0.525  0.575  0.625  0.675  0.725  0.775  0.825  0.875  0.925  0.975]
[-1.95996398 -1.43953147 -1.15034938 -0.93458929 -0.75541503 -0.59776013
 -0.45376219 -0.31863936 -0.18911843 -0.06270678  0.06270678  0.18911843
  0.31863936  0.45376219  0.59776013  0.75541503  0.93458929  1.15034938
  1.43953147  1.95996398]

Out[23]:
<matplotlib.image.AxesImage at 0x345a2e80>
In [28]:
from theano import tensor as T
from theano import clone

f_z_init_mean = m.function(['inpt'], m.init_recog.mean)
f_z_init_var = m.function(['inpt'], m.init_recog.var)

f_v_init_var = m.function(['inpt'], m.kin_energy.var)

full_sample = m.hmc_sampler.sample_with_path()
f_full_sample = m.function(['inpt'], T.concatenate([full_sample[0], full_sample[1]], axis=1))
In [29]:
#from theano.printing import debugprint
#debugprint(f_full_sample.theano_func)
In [30]:
final_pos = T.matrix('final_pos')
final_vel = T.matrix('final_vel')
inpt_replacements = {m.final_vel_model_inpt['position']: final_pos,
                     m.final_vel_model_inpt['time']: T.cast(m.hmc_sampler.n_hmc_steps, dtype='float32')}

final_vel_model_var = clone(m.final_vel_model.var, replace=inpt_replacements)
final_vel_model_mean = clone(m.final_vel_model.mean, replace=inpt_replacements)
final_vel_model_nll = clone(m.final_vel_model.nll(final_vel).sum(-1), replace=inpt_replacements)

f_v_final_var = m.function(['inpt', final_pos], final_vel_model_var)
f_v_final_mean = m.function(['inpt', final_pos], final_vel_model_mean)
f_v_final_model_nll = m.function(['inpt', final_pos, final_vel], final_vel_model_nll)
In [31]:
L = f_z_sample(X[:50000, :])
L_init = f_z_init_sample(X[:50000, :])
In [32]:
X_index = 0  # 5
num_repeats = 1000
In [33]:
#m.parameters.__setitem__(m.hmc_sampler.step_size_param, 0.08)
#m.parameters.__setitem__(m.kin_energy.variance_parameter, -0.5 * np.ones_like(m.kin_energy.variance_parameter))
#print m.parameters.view(m.kin_energy.variance_parameter)
#print m.parameters.view(m.hmc_sampler.step_size_param)

print f_z_init_mean(np.array([X[X_index, :]]))
print f_z_init_var(np.array([X[X_index, :]]))
print f_v_init_var(np.array([X[X_index, :]]))
#print f_v_final_var(np.array(X[:2]))
#print f_v_final_mean(np.array(X[:2]))
[[ 0.44820982 -0.12260872]]
[[ 0.08794454  0.02411422]]
[[ 0.70616812  0.7752651 ]]

In [34]:
repeated_X = np.tile(np.array([X[X_index, :]]), (num_repeats, 1)).astype('float32')

full_sample = f_full_sample(repeated_X)
z_samples = full_sample[:, :num_repeats, :]
v_samples = full_sample[:, num_repeats:, :]
In [35]:
print z_samples[m.n_hmc_steps, :, :].mean(axis=0)
print z_samples[m.n_hmc_steps, :, :].var(axis=0)
[ 0.44263726 -0.13144214]
[ 0.10377684  0.01897874]

In [36]:
z_sample_final_mean = z_samples[m.n_hmc_steps, :, :].mean(axis=0)
z_sample_final_std = z_samples[m.n_hmc_steps, :, :].std(axis=0)
variation_start = z_sample_final_mean - 2*z_sample_final_std
variation_end = z_sample_final_mean + 2*z_sample_final_std

final_vel_model_mean_output = np.zeros((m.n_latent, num_repeats, m.n_latent))
final_vel_model_var_output = np.zeros((m.n_latent, num_repeats, m.n_latent))

for variation_dim in range(m.n_latent):
    z_variation = np.linspace(variation_start[variation_dim], variation_end[variation_dim], num_repeats)
    sample_array = np.tile(z_sample_final_mean, (num_repeats, 1))
    sample_array[:, variation_dim] = z_variation
    final_vel_model_mean_output[variation_dim] = f_v_final_mean(repeated_X, sample_array)
    final_vel_model_var_output[variation_dim] = f_v_final_var(repeated_X, sample_array)
In [37]:
dim1 = 0
dim2 = 1
In [38]:
fig, axs = plt.subplots(1, 2, figsize=(18, 9))
axs[0].scatter(L[:, dim1], L[:, dim2], c=Z[:].argmax(1), lw=0, s=5, alpha=.2)
axs[1].scatter(L_init[:, dim1], L_init[:, dim2], c=Z[:].argmax(1), lw=0, s=5, alpha=.2)

import matplotlib as mpl
cax = fig.add_axes([0.95, 0.2, 0.02, 0.6])
#norm = mpl.colors.Normalize(vmin=0, vmax=9)
#cbar = mpl.colorbar.ColorbarBase(cax, norm=norm, spacing='proportional')
cax.scatter(np.repeat(0, 10), np.arange(10), c=np.arange(10), lw=0, s=300)
cax.set_xlim(-0.1, 0.1)
cax.set_ylim(-0.5, 9.5)
plt.yticks(np.arange(10))
plt.tick_params(axis='x', which='both', bottom='off', top='off', labelbottom='off')
cax.tick_params(axis='y', colors='white')
for tick in cax.yaxis.get_major_ticks():
    tick.label.set_fontsize(14)
    tick.label.set_color('black')
    
cax.spines['bottom'].set_color('white')
cax.spines['top'].set_color('white') 
cax.spines['right'].set_color('white')
cax.spines['left'].set_color('white')

axs[0].set_title('After HMC steps')
axs[1].set_title('Initial recognition model')

axs[0].set_xlim(-3, 3)
axs[0].set_ylim(-3, 3)
axs[1].set_xlim(-3, 3)
axs[1].set_ylim(-3, 3)
Out[38]:
(-3, 3)
In [41]:
fig, axs = plt.subplots(4, 5, figsize=(20, 16))
colors = cm.jet(np.linspace(0, 1, 10))
for i in range(5):
    axs[0, i].scatter(L_init[Z[:].argmax(1) == i, dim1], L_init[Z[:].argmax(1) == i, dim2], c=colors[i], lw=0, s=5, alpha=.2)
    axs[1, i].scatter(L[Z[:].argmax(1) == i, dim1], L[Z[:].argmax(1) == i, dim2], c=colors[i], lw=0, s=5, alpha=.2)
    axs[0, i].set_title(str(i) + ' before HMC')
    axs[1, i].set_title(str(i) + ' after HMC')
    axs[2, i].scatter(L_init[Z[:].argmax(1) == (5+i), dim1], L_init[Z[:].argmax(1) == (5+i), dim2], c=colors[5+i], lw=0, s=5, alpha=.2)
    axs[3, i].scatter(L[Z[:].argmax(1) == (5+i), dim1], L[Z[:].argmax(1) == (5+i), dim2], c=colors[5+i], lw=0, s=5, alpha=.2)
    axs[2, i].set_title(str(5+i) + ' before HMC')
    axs[3, i].set_title(str(5+i) + ' after HMC')
    for j in range(4):
        axs[j, i].set_xlim(-3, 3)
        axs[j, i].set_ylim(-3, 3)
In [42]:
resolution = 200
lower_dim1_limit = z_sample_final_mean[dim1] - 0.5
upper_dim1_limit = z_sample_final_mean[dim1] + 0.5
lower_dim2_limit = z_sample_final_mean[dim2] - 0.5
upper_dim2_limit = z_sample_final_mean[dim2] + 0.5

pos = T.matrix()
pot_en = m.function([pos, m.inpt], m.hmc_sampler.eval_pot_energy(pos))
pot_energy_matrix = np.zeros((resolution, resolution), dtype='float32')
x = cast_array_to_local_type(np.linspace(lower_dim1_limit, upper_dim1_limit, resolution))
y = cast_array_to_local_type(np.linspace(lower_dim2_limit, upper_dim2_limit, resolution))
for i in range(resolution):
    for j in range(resolution):
        #pos_array = f_z_init_mean(np.array([X[X_index, :]]))
        pos_array = np.array([z_samples[m.n_hmc_steps, :, :].mean(axis=0)])
        pos_array[0, dim1] = x[i]
        pos_array[0, dim2] = y[j]
        pot_energy_matrix[j, i] = pot_en(pos_array, np.array([X[X_index, :]]))

fig, ax = plt.subplots(1, 1, figsize=(9, 9))
CS = ax.contour(x, y, pot_energy_matrix)
plt.clabel(CS, inline=1, fmt='%r', fontsize=10)
plt.show()
C:\Anaconda\lib\site-packages\matplotlib\contour.py:479: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[xy2, lc[I[1]:I[0] + 1], xy1])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:479: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[xy2, lc[I[1]:I[0] + 1], xy1])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:483: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[lc[:I[0] + 1], xy1])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:485: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[xy2, lc[I[1]:]])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:483: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[lc[:I[0] + 1], xy1])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:485: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[xy2, lc[I[1]:]])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:483: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[lc[:I[0] + 1], xy1])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:485: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[xy2, lc[I[1]:]])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:483: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[lc[:I[0] + 1], xy1])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:485: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[xy2, lc[I[1]:]])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:483: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[lc[:I[0] + 1], xy1])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:485: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[xy2, lc[I[1]:]])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:483: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[lc[:I[0] + 1], xy1])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:485: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[xy2, lc[I[1]:]])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:483: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[lc[:I[0] + 1], xy1])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:485: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[xy2, lc[I[1]:]])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:479: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[xy2, lc[I[1]:I[0] + 1], xy1])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:479: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[xy2, lc[I[1]:I[0] + 1], xy1])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:483: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[lc[:I[0] + 1], xy1])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:483: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[lc[:I[0] + 1], xy1])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:485: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[xy2, lc[I[1]:]])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:483: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[lc[:I[0] + 1], xy1])
C:\Anaconda\lib\site-packages\matplotlib\contour.py:485: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  nlc.append(np.r_[xy2, lc[I[1]:]])

In [47]:
fig, axs = plt.subplots(4, 2, figsize=(18, 36))

#contour_levels = (198, 200, 202, 204, 206, 208, 210)
#contour_levels = (130, 140, 150, 160, 180, 200, 240, 280)
#contour_levels = (180, 190, 200, 220, 240, 260, 280, 300, 320)
#contour_levels = (400, 402, 404, 406, 408, 410, 412, 416, 420)
#contour_levels = (106, 108, 110, 112, 114, 116, 118, 120, 124, 128)
contour_levels = (160, 165, 170, 175, 180, 185, 190, 200, 210, 215, 217, 220, 230, 240)

#CS0 = axs[0, 0].contourf(x, y, pot_energy_matrix, np.linspace(155, 240, 500))
CS0 = axs[0, 0].contour(x, y, pot_energy_matrix, contour_levels)
CS1 = axs[1, 0].contour(x, y, pot_energy_matrix, contour_levels)
CS2 = axs[2, 0].contour(x, y, pot_energy_matrix, contour_levels)
CS3 = axs[3, 0].contour(x, y, pot_energy_matrix, contour_levels)

def colour_for_z_samples(samples):
    mean = samples.mean(axis=0)
    mean1 = mean[dim1]
    mean2 = mean[dim2]
    colour = np.zeros_like(samples[:, 0])
    colour[np.logical_and(samples[:, dim1] < mean1,  samples[:, dim2] < mean2)] = 0
    colour[np.logical_and(samples[:, dim1] < mean1,  samples[:, dim2] >= mean2)] = 2
    colour[np.logical_and(samples[:, dim1] >= mean1, samples[:, dim2] < mean2)] = 4
    colour[np.logical_and(samples[:, dim1] >= mean1, samples[:, dim2] >= mean2)] = 7
    colour[((samples[:, dim1] - mean1) ** 2 + (samples[:, dim2] - mean2) ** 2) < 1e-3] = 9
    return colour.astype('int32')

colour = colour_for_z_samples(z_samples[m.n_hmc_steps,:,:])
print v_samples[m.n_hmc_steps, colour == 0, :].mean(axis=0)
print v_samples[m.n_hmc_steps, colour == 2, :].mean(axis=0)
print v_samples[m.n_hmc_steps, colour == 4, :].mean(axis=0)
print v_samples[m.n_hmc_steps, colour == 7, :].mean(axis=0)
print v_samples[m.n_hmc_steps, colour == 9, :].mean(axis=0)
print v_samples[m.n_hmc_steps, colour == 0, :].var(axis=0)
print v_samples[m.n_hmc_steps, colour == 2, :].var(axis=0)
print v_samples[m.n_hmc_steps, colour == 4, :].var(axis=0)
print v_samples[m.n_hmc_steps, colour == 7, :].var(axis=0)
print v_samples[m.n_hmc_steps, colour == 9, :].var(axis=0)



axs[0, 0].scatter(z_samples[0,:,dim1], z_samples[0,:,dim2], c=colors[colour_for_z_samples(z_samples[0,:,:])], s=20, alpha=.3, lw=0)
axs[1, 0].scatter(z_samples[1,:,dim1], z_samples[1,:,dim2], c=colors[colour_for_z_samples(z_samples[1,:,:])], s=20, alpha=.3, lw=0)
axs[2, 0].scatter(z_samples[2,:,dim1], z_samples[2,:,dim2], c=colors[colour_for_z_samples(z_samples[2,:,:])], s=20, alpha=.3, lw=0)
axs[3, 0].scatter(z_samples[3,:,dim1], z_samples[3,:,dim2], c=colors[colour_for_z_samples(z_samples[3,:,:])], s=20, alpha=.3, lw=0)

axs[0, 0].set_title('Initial recognition model')
axs[1, 0].set_title('After 1 HMC step')
axs[2, 0].set_title('After 2 HMC step')
axs[3, 0].set_title('After 3 HMC step')

axs[0, 1].scatter(v_samples[0,:,dim1], v_samples[0,:,dim2], c=colors[colour_for_z_samples(z_samples[0,:,:])], s=20, alpha=.3, lw=0)
axs[1, 1].scatter(v_samples[1,:,dim1], v_samples[1,:,dim2], c=colors[colour_for_z_samples(z_samples[1,:,:])], s=20, alpha=.3, lw=0)
axs[2, 1].scatter(v_samples[2,:,dim1], v_samples[2,:,dim2], c=colors[colour_for_z_samples(z_samples[2,:,:])], s=20, alpha=.3, lw=0)
axs[3, 1].scatter(v_samples[3,:,dim1], v_samples[3,:,dim2], c=colors[colour_for_z_samples(z_samples[3,:,:])], s=20, alpha=.3, lw=0)

plt.clabel(CS0, inline=1, fmt='%r', fontsize=10)
plt.clabel(CS1, inline=1, fmt='%r', fontsize=10)
plt.clabel(CS2, inline=1, fmt='%r', fontsize=10)
plt.clabel(CS3, inline=1, fmt='%r', fontsize=10)

print pot_energy_matrix.min()
print pot_energy_matrix.max()
axs[0, 0].scatter(f_z_init_mean(np.array([X[X_index,:]]))[0, dim1], f_z_init_mean(np.array([X[X_index,:]]))[0, dim2], c='black', s=20)

#axs[0, 1].set_xlim(-3, 3)
#axs[0, 1].set_ylim(-3, 3)
#axs[1, 1].set_xlim(-3, 3)
#axs[1, 1].set_ylim(-3, 3)
#axs[2, 1].set_xlim(-3, 3)
#axs[2, 1].set_ylim(-3, 3)
#axs[3, 1].set_xlim(-3, 3)
#axs[3, 1].set_ylim(-3, 3)
plt.show()
[-0.48640251 -0.50158674]
[ 0.36549684 -0.60839397]
[ 1.88490951  3.76053119]
[ 0.23782025  0.34295022]
[ nan  nan]
[ 1.76682651  1.74495685]
[ 2.80541182  2.84742117]
[ 1.9313978   2.96225786]
[ 1.2282933   2.25071526]
[ nan  nan]
178.793
358.41

In [44]:
fig, axs = plt.subplots(1, 2, figsize=(18, 9))
axs[0].scatter(final_vel_model_mean_output[:, :, dim1], 
           final_vel_model_mean_output[:, :, dim2],  
           c=np.transpose(np.tile(np.linspace(0,m.n_latent-1,m.n_latent), (num_repeats, 1))), 
           lw=0, s=5)
axs[1].scatter(final_vel_model_var_output[:, :, dim1], 
           final_vel_model_var_output[:, :, dim2],  
           c=np.transpose(np.tile(np.linspace(0,m.n_latent-1,m.n_latent), (num_repeats, 1))), 
           lw=0, s=5)

plt.show()
In [121]:
final_vel_mean = f_v_final_mean(repeated_X, z_samples[3, :, :])
final_vel_var = f_v_final_var(repeated_X, z_samples[3, :, :])
final_vel_nll = f_v_final_model_nll(repeated_X, z_samples[3, :, :], v_samples[3, :, :])
In [122]:
fig, axs = plt.subplots(4, 2, figsize=(18, 36))
# TODO: Analysis of how final_vel_mean and final_vel_var depend on z (since they all share the same x)

print z_samples[3, :, :].mean(axis=0)
print z_samples[3, :, :].var(axis=0)
print v_samples[3, :, :].mean(axis=0)
print v_samples[3, :, :].var(axis=0)
print f_v_init_var(np.array([X[X_index, :]]))

print final_vel_nll.mean()
plt.boxplot(final_vel_nll, whis=1)
plt.show()
[ 0.21473025 -0.19174516 -0.07817629 -0.54020822  0.00570643  0.92683065
 -2.58824539 -2.66907525  0.02120089 -1.08640814  1.02576232  0.63050562
 -0.02567368 -0.01660508 -0.04360378 -1.11134505  0.92565304 -0.04196304
  1.43162405  1.4584192 ]
[ 0.3057653   0.06962163  0.03578784  0.0602233   0.87510037  0.1735246
  0.18416935  0.08336087  0.96069825  0.0362573   0.13567998  0.01490721
  0.97588456  0.00631169  1.06898785  0.02703508  0.01397526  0.97730625
  0.08239475  0.16031411]
[ 0.04449169  0.12222196 -0.1828669   0.04990669 -0.02095215 -0.03718433
 -0.2331453  -0.14117274  0.00426601 -0.11605906 -0.07840855 -0.09423795
  0.04892569 -0.05396308  0.03896911  0.1124519   0.05531485  0.01778946
  0.01325743  0.14188589]
[ 0.29676583  0.77735442  1.03890181  0.77550554  0.94352019  0.29334092
  0.32263157  0.56881511  0.92952013  1.19633138  0.34872594  1.15440321
  0.95708311  1.99418426  0.97389227  0.63270682  1.36505592  1.02891397
  0.56653243  0.42722607]
[[ 0.3069469   0.75371057  0.92495126  0.70866412  0.99649084  0.31643295
   0.32267544  0.58398938  1.00085127  1.21435797  0.33111885  1.06281757
   1.00400484  1.78664172  1.01869845  0.53073698  1.28049719  1.01012361
   0.56993032  0.42565113]]
25.2992

In [11]:
centers = np.zeros((10,n_latents))
stddevs = np.zeros((10,n_latents))
centers_init = np.zeros((10,n_latents))
stddevs_init = np.zeros((10,n_latents))
for i in range(10):
    Li = f_z_sample(X[Z.argmax(1) == i])
    centers[i] = Li.mean(axis=0)
    stddevs[i] = np.std(Li, axis=0)
    
    Li_init = f_z_init_sample(X[Z.argmax(1) == i])
    centers_init[i] = Li_init.mean(axis=0)
    stddevs_init[i] = np.std(Li_init, axis=0)
In [33]:
fig, axs = plt.subplots(1, 2, figsize=(18, 9))
axs[0].scatter(centers[:, dim1], centers[:, dim2], c=range(10), s=50)
axs[0].scatter(centers_init[:, dim1], centers_init[:, dim2], c=range(10), s=50, marker=u's')

axs[1].scatter(centers[:, dim1], centers[:, dim2], c=range(10), s=50)
axs[1].scatter(centers[:, dim1] + stddevs[:, dim1], centers[:, dim2], c=range(10), s=50, marker=u'>')
axs[1].scatter(centers[:, dim1] - stddevs[:, dim1], centers[:, dim2], c=range(10), s=50, marker=u'<')
axs[1].scatter(centers[:, dim1], centers[:, dim2] + stddevs[:, dim2], c=range(10), s=50, marker=u'^')
axs[1].scatter(centers[:, dim1], centers[:, dim2] - stddevs[:, dim2], c=range(10), s=50, marker=u'v')

#axs[0].set_xlim(-1.2, 1.2)
#axs[0].set_ylim(-1.2, 1.2)
#axs[1].set_xlim(-1.2, 1.2)
#axs[1].set_ylim(-1.2, 1.2)

print (centers[:, dim1] - centers_init[:, dim1])
print (centers[:, dim2] - centers_init[:, dim2])
print (stddevs[:, dim1] - stddevs_init[:, dim1])
print (stddevs[:, dim2] - stddevs_init[:, dim2])
[ 2.19889748 -0.15354697 -0.34954457  0.87099415 -1.05555475  0.81190634
  4.02935266 -3.8288697  -0.60245502 -1.7203421 ]
[ 4.19529772 -4.50300527 -0.18695778 -0.12552167  0.44959015  0.74979743
  0.06509392 -0.19518258 -0.03151836 -0.00450408]
[-0.00488156 -0.01242882 -0.0063476  -0.00664794 -0.00611991 -0.00398558
 -0.00062197 -0.00353235 -0.01121491 -0.00401866]
[-0.0054999  -0.00590807 -0.00709569 -0.00490659 -0.00718015 -0.00599122
 -0.00477272 -0.00955963 -0.00638729 -0.00433201]

In [None]: